from sklearn.metrics import precision_score, recall_score, f1_score


def loadWord2Vec(filename):
    vocab = []
    embd = []
    # cnt = 0
    vocab_dic = {}
    with open(filename, encoding="utf8") as f:
        idx = 0
        for line in f:
            ll = line.strip().split("\t")
            if len(ll) != 101:
                continue
            vocab.append(ll[0])
            embd.append([float(i) for i in ll[1:]])
            vocab_dic[ll[0]] = idx
            idx = idx + 1
    return vocab, embd, vocab_dic


def load_batch(batch_name, batch_idx, classes, sequence_len, word2id_dict):
    input_path = "../data/batch_data/%s_%s" % (batch_name, batch_idx)
    input_x = []
    input_y = []
    with open(input_path, encoding="utf8") as f:
        for line in f:
            ll = line.strip().split("\t")
            if ll[0] != "1" and ll[0] != "0":
                continue
            input_y_get = [0 for i in range(classes)]
            input_y_get[int(ll[0])] = 1
            input_y.append(input_y_get)
            tmp_sent = []
            if len(ll[1:]) >= sequence_len:
                tmp_sent = ll[1:sequence_len + 1]
            else:
                tmp_sent = ll[1:]
                while len(tmp_sent) < sequence_len:
                    tmp_sent.append(-1)
            if len(tmp_sent) != sequence_len:
                print(len(tmp_sent))
                print(tmp_sent)
            for idx in range(sequence_len):
                if tmp_sent[idx] in word2id_dict:
                    tmp_sent[idx] = word2id_dict[tmp_sent[idx]]
                elif tmp_sent[idx] == -1:
                    tmp_sent[idx] = -1
                else:
                    tmp_sent[idx] = word2id_dict["unk_"]
            input_x.append(tmp_sent)
    return input_x, input_y


def model_rep(y_true, y_pred, average="micro"):
    p = precision_score(y_true, y_pred, average=average)
    r = recall_score(y_true, y_pred, average=average)
    f1score = f1_score(y_true, y_pred, average=average)
    return p, r, f1score
